EEA AQ Stations

Author

Johannes Heisig

Published

October 12, 2023

suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(stars)
library(terra)
})

Stations

Create a table with rows for each station and pollutant recorded. Stations have a type (background, industrial, traffic), an area label (rural, urban, suburban), and coordinates in lon/lat.

Station meta data file comes from EEA.

station_meta = read.table("AQ_stations/PanEuropean_metadata.csv", sep = "\t", header = T) |> 
  dplyr::select(AirQualityStationEoICode, Countrycode, Longitude, Latitude,
                StationType = AirQualityStationType, 
                StationArea = AirQualityStationArea,
                AirPollutant = AirPollutantCode) |> 
  dplyr::mutate(AirPollutant = factor(basename(AirPollutant),
                                      levels = c(6001, 5, 8, 7),
                                      labels = c("PM2.5", "PM10", "NO2", "O3"))) |> 
  dplyr::filter(!is.na(AirPollutant)) |> 
  unique() |> 
  dplyr::mutate(AirQualityStationEoICode = as.factor(AirQualityStationEoICode),
                Countrycode = as.factor(Countrycode),
                StationType = as.factor(StationType),
                StationArea = as.factor(StationArea)) |> 
  dplyr::group_by(AirQualityStationEoICode, Countrycode, 
                  StationType, StationArea, AirPollutant) |>
  dplyr::summarise(Longitude = mean(Longitude), Latitude = mean(Latitude), .groups = 'drop') |> 
  dplyr::group_by(AirQualityStationEoICode, StationArea, AirPollutant) |> 
  # if two types registered for 1 station, label them as non-background (n=45).
  dplyr::filter(!(dplyr::n() > 1 & !StationType == "background")) |> 
  dplyr::ungroup()

station_laea = 
  select(station_meta, AirQualityStationEoICode, Longitude, Latitude) |> 
  unique() |> 
  st_as_sf(coords = c("Longitude","Latitude"), crs = 4326) |> 
  st_transform(st_crs(3035))

Supplementary Data

Elevation

elev = "supplementary/EEA_stations_elevation.parquet"

if (!file.exists(elev)){
  dem = stars::read_stars("/vsicurl/https://s3.eu-central-1.wasabisys.com/eumap/dtm/dtm_elev.lowestmode_gedi.eml_mf_30m_0..0cm_2000..2018_eumap_epsg3035_v0.3.tif")
  
  Sys.setenv(GDAL_HTTP_MULTIRANGE = 'YES',
             GDAL_DISABLE_READDIR_ON_OPEN="YES")
  
  tictoc::tic()
  station_laea$Elevation = stars::st_extract(dem, station_laea) |> pull(1)
  arrow::write_parquet(st_drop_geometry(station_laea), elev)
  tictoc::toc()
  
} else {
  station_elevation = arrow::read_parquet(elev) |> 
    mutate(Elevation = ifelse(Elevation < -100, NA, Elevation))
}

(station_laea = left_join(station_laea, station_elevation))
Joining with `by = join_by(AirQualityStationEoICode)`
Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1608 of `x` matches multiple rows in `y`.
ℹ Row 1608 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
Simple feature collection with 6164 features and 2 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -2677029 ymin: -3074758 xmax: 10002190 ymax: 6182022
Projected CRS: ETRS89-extended / LAEA Europe
# A tibble: 6,164 × 3
   AirQualityStationEoICode          geometry Elevation
   <fct>                          <POINT [m]>     <dbl>
 1 4101422                  (5959964 2181947)        NA
 2 4101522                  (5959045 2184719)        NA
 3 AD0942A                  (3625031 2195164)     10444
 4 AD0944A                  (3627252 2195722)     16231
 5 AD0945A                  (3639878 2196313)     24894
 6 AL0203A                  (5233926 2012682)      8455
 7 AL0204A                  (5127863 1973522)      2143
 8 AL0205A                  (5113104 2073920)        46
 9 AL0206A                  (5106425 2184055)      7169
10 AL0207A                  (5168620 2057815)      1272
# ℹ 6,154 more rows

Corine Land Cover

clc_8class = "supplementary/CLC_reclass_8.tif"
if (!file.exists(clc_8class)){
  lc = rast("supplementary/lcv_landcover.clc_corine_c_100m_0..0cm_2018_eumap_epsg3035_v2020.tif")
  NAflag(lc) = 128
  summary(lc)
  rcl = cbind(c(1:48),
              c(1,2,3, rep(4,3), rep(3,3), rep(5,2), rep(6, 11), rep(7,12), rep(8,14)))
  lc8 = classify(lc, rcl, filename = clc_8class,
                 datatype = "INT1U", NAflag = 0, overwrite=T,
                 gdal=c("COMPRESS=DEFLATE"))
} else {
  lc8 = rast(clc_8class)
}
NAflag(lc8) = 128
levels(lc8) =  data.frame(id = c(1:8),
                          CLC8 = c("HDR", "LDR", "IND","TRAF","UGR","AGR","NAT","OTH"))
plot(lc8)

station_laea$CLC8 = extract(lc8, vect(station_laea)) |> pull(2)

Population Density

pop = rast("supplementary/JRC_1K_POP_2018.tif")
plot(pop, breaks = c(0,5,10,50,100,500,1000))

station_laea$Population = extract(pop, vect(station_laea)) |> pull(2) |> replace_na(0)

Result

station_meta = left_join(station_meta, station_laea)
Joining with `by = join_by(AirQualityStationEoICode)`
Warning in left_join(station_meta, station_laea): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 3767 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
summary(station_meta)
 AirQualityStationEoICode  Countrycode       StationType  
 IT1170A:   24            IT     :2213   background:9952  
 IT2129A:   16            ES     :2054   industrial:2025  
 TR0066A:   16            DE     :1966   traffic   :3904  
 SE0093A:   12            FR     :1885                    
 IT1090A:    9            TR     :1253                    
 E135019:    8            PL     :1024                    
 (Other):15796            (Other):5486                    
         StationArea   AirPollutant   Longitude          Latitude     
 rural         :1817   PM2.5:2862   Min.   :-63.081   Min.   :-21.34  
 rural-nearcity: 253   PM10 :4725   1st Qu.:  3.363   1st Qu.: 41.72  
 rural-regional: 451   NO2  :5123   Median : 10.791   Median : 46.65  
 rural-remote  : 124   O3   :3171   Mean   : 10.811   Mean   : 46.34  
 suburban      :3602                3rd Qu.: 17.961   3rd Qu.: 50.92  
 urban         :9634                Max.   : 55.628   Max.   : 78.91  
                                                                      
          geometry       Elevation          CLC8        Population   
 POINT        :15881   Min.   :  -52   LDR    :7478   Min.   :    0  
 epsg:3035    :    0   1st Qu.:  328   HDR    :3002   1st Qu.:  172  
 +proj=laea...:    0   Median : 1152   AGR    :2032   Median : 2448  
                       Mean   : 2036   IND    :1357   Mean   : 4132  
                       3rd Qu.: 2737   NAT    : 941   3rd Qu.: 5910  
                       Max.   :30185   (Other): 883   Max.   :51127  
                       NA's   :1420    NA's   : 188                  
table(station_meta$AirPollutant, station_meta$StationType)
       
        background industrial traffic
  PM2.5       1851        323     688
  PM10        2830        672    1223
  NO2         2740        661    1722
  O3          2531        369     271
table(station_meta$AirPollutant, station_meta$StationArea)
       
        rural rural-nearcity rural-regional rural-remote suburban urban
  PM2.5   274             46             83           21      617  1821
  PM10    477             77            104           25     1086  2956
  NO2     551             71            113           30     1095  3263
  O3      515             59            151           48      804  1594
table(station_meta$StationArea, station_meta$StationType)
                
                 background industrial traffic
  rural                1434        348      35
  rural-nearcity        201         38      14
  rural-regional        440          7       4
  rural-remote          124          0       0
  suburban             2307        866     429
  urban                5446        766    3422
station_meta_sf = sf::st_as_sf(station_meta, coords = c("Longitude","Latitude"), crs = 4326)

# filter(station_meta_sf, 
#        StationType == "background",
#        AirPollutant == "NO2") |> 
#   mapview::mapview(zcol="StationArea")

# filter(station_meta_sf, is.na(CLC8)) |> 
#   mapview::mapview(zcol="StationArea")
# 
# filter(station_meta_sf, is.na(Elevation)) |> 
#   mapview::mapview(zcol="Countrycode")

station_final = filter(station_meta_sf,
                       !is.na(CLC8),
                       !is.na(Elevation)) |> 
  select(-AirPollutant) |> 
  group_by(AirQualityStationEoICode, Countrycode) |> 
  filter(row_number()==1)

mapview::mapview(station_final, zcol="CLC8")
The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
which was just loaded, were retired in October 2023.
Please refer to R-spatial evolution reports for details, especially
https://r-spatial.org/r/2023/05/15/evolution4.html.
It may be desirable to make the sf package available;
package maintainers should consider adding sf to Suggests:.
summary(station_final)
 AirQualityStationEoICode  Countrycode       StationType  
 AD0942A:   1             DE     : 869   background:3229  
 AD0944A:   1             IT     : 747   industrial: 723  
 AD0945A:   1             ES     : 715   traffic   :1740  
 AL0203A:   1             FR     : 706                    
 AL0204A:   1             PL     : 461                    
 AL0205A:   1             SE     : 237                    
 (Other):5686             (Other):1957                    
         StationArea            geometry      Elevation          CLC8     
 rural         : 631   POINT        :5692   Min.   :  -52   LDR    :2787  
 rural-nearcity: 110   epsg:4326    :   0   1st Qu.:  339   HDR    :1060  
 rural-regional: 179   +proj=long...:   0   Median : 1121   AGR    : 719  
 rural-remote  :  47                        Mean   : 1995   IND    : 475  
 suburban      :1333                        3rd Qu.: 2676   NAT    : 351  
 urban         :3392                        Max.   :30185   UGR    : 166  
                                                            (Other): 134  
   Population   
 Min.   :    0  
 1st Qu.:  624  
 Median : 2918  
 Mean   : 4503  
 3rd Qu.: 6363  
 Max.   :51127  
                

Write files

bind_cols(st_drop_geometry(station_final), 
      data.frame(st_coordinates(station_final)) |> 
        setNames(c("Longitude","Latitude"))) |> 
  arrow::write_parquet("AQ_stations/EEA_stations_meta.parquet")

geoarrow::write_geoparquet(station_final, "AQ_stations/EEA_stations_meta_sf.parquet")